import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://top.baidu.com/board?tab=realtime'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}

response = requests.get(url, headers=headers)  # 发送请求
soup = BeautifulSoup(response.text, 'lxml')  # 解析网页

# 找到热搜榜
titles = soup.select('.c-single-text-ellipsis')

# 遍历热搜榜，获取标题和链接
list1 = []
link_list = []
for i in titles:
    title = i.get_text().strip().replace('#','')     # 获取标题
    list1.append(title)
searches = soup.select('.hot-index_1Bl1a')
list2 = []
for j in searches:
    search = j.get_text().strip()
    list2.append(search)
# 保存到Excel文件
data_dict={'标题': list1, '搜索指数': list2}
df = pd.DataFrame(data_dict)
df.to_csv('百度热搜榜.csv', index=False, encoding='utf-8-sig')    # index=False表示不保存索引列